Rem
Rem $Header: olsexmpl.sql 09-jun-2004.17:32:14 lvbcheng Exp $
Rem
Rem olsexmpl.sql
Rem
Rem Copyright (c) 2004, Oracle. All rights reserved.  
Rem
Rem    NAME
Rem      olsexmpl.sql - OLS Regression Examples
Rem
Rem    DESCRIPTION
Rem      This file contains examples of using the OLS_Regression
Rem      type (defined in olstype.sql file).
Rem
Rem    NOTES
Rem      Requires that olstype.sql has been run.
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    lvbcheng    06/09/04 - lvbcheng_matrix_prototype
Rem    achaudhr    04/19/04 - Replace OLS_Array with UTL_NLA_ARRAY_DBL
Rem    achaudhr    03/30/04 - Created
Rem

SET ECHO ON
SET FEEDBACK 1
SET NUMWIDTH 10
SET LINESIZE 80
SET TRIMSPOOL ON
SET TAB OFF
SET PAGESIZE 100

Rem
Rem Data Set: Scottish Hill Races
Rem
Rem OLS Model: Time = c0 + (c1 * Dist) + (c2 * Climb)
Rem
Rem Description: We build OLS models by sampling 15% of the data,
Rem and compare the model predicted time with the actual time.
Rem

drop table hills;

create table hills (
       name   varchar(100), /* Name of the hill       */
       dist   number,       /* Overall race distance  */
       climb  number,       /* Total height climbed   */
       time   number        /* Record time            */
);

insert into hills(name, dist, climb, time) values ('Greenmantle',       2.5, 650,  16.083);
insert into hills(name, dist, climb, time) values ('Carnethy',          6,   2500, 48.35);
insert into hills(name, dist, climb, time) values ('Craig Dunain',      6,   900,  33.65);
insert into hills(name, dist, climb, time) values ('Ben Rha',           7.5, 800,  45.6);
insert into hills(name, dist, climb, time) values ('Ben Lomond',        8,   3070, 62.267);
insert into hills(name, dist, climb, time) values ('Goatfell',          8,   2866, 73.217);
insert into hills(name, dist, climb, time) values ('Bens of Jura',      16,  7500, 204.617);
insert into hills(name, dist, climb, time) values ('Cairnpapple',       6,   800,  36.367);
insert into hills(name, dist, climb, time) values ('Scolty',            5,   800,  29.75);
insert into hills(name, dist, climb, time) values ('Traprain',          6,   650,  39.75);
insert into hills(name, dist, climb, time) values ('Lairig Ghru',       28,  2100, 192.667);
insert into hills(name, dist, climb, time) values ('Dollar',            5,   2000, 43.05);
insert into hills(name, dist, climb, time) values ('Lomonds',           9.5, 2200, 65);
insert into hills(name, dist, climb, time) values ('Cairn Table',       6,   500,  44.133);
insert into hills(name, dist, climb, time) values ('Eildon Two',        4.5, 1500, 26.933);
insert into hills(name, dist, climb, time) values ('Cairngorm',         10,  3000, 72.25);
insert into hills(name, dist, climb, time) values ('Seven Hills',       14,  2200, 98.417);
insert into hills(name, dist, climb, time) values ('Knock Hill',        3,   350,  78.65);
insert into hills(name, dist, climb, time) values ('Black Hill',        4.5, 1000, 17.417);
insert into hills(name, dist, climb, time) values ('Creag Beag',        5.5, 600,  32.567);
insert into hills(name, dist, climb, time) values ('Kildcon Hill',      3,   300,  15.95);
insert into hills(name, dist, climb, time) values ('Meall Ant-Suidhe',  3.5, 1500, 27.9);
insert into hills(name, dist, climb, time) values ('Half Ben Nevis',    6,   2200, 47.633);
insert into hills(name, dist, climb, time) values ('Cow Hill',          2,   900,  17.933);
insert into hills(name, dist, climb, time) values ('N Berwick Law',     3,   600,  18.683);
insert into hills(name, dist, climb, time) values ('Creag Dubh',        4,   2000, 26.217);
insert into hills(name, dist, climb, time) values ('Burnswark',         6,   800,  34.433);
insert into hills(name, dist, climb, time) values ('Largo Law',         5,   950,  28.567);
insert into hills(name, dist, climb, time) values ('Criffel',           6.5, 1750, 50.5);
insert into hills(name, dist, climb, time) values ('Acmony',            5,   500,  20.95);
insert into hills(name, dist, climb, time) values ('Ben Nevis',         10,  4400, 85.583);
insert into hills(name, dist, climb, time) values ('Knockfarrel',       6,   600,  32.383);
insert into hills(name, dist, climb, time) values ('Two Breweries',     18,  5200, 170.25);
insert into hills(name, dist, climb, time) values ('Cockleroi',         4.5, 850,  28.1);
insert into hills(name, dist, climb, time) values ('Moffat Chase',      20,  5000, 159.833);

select hills.time "Actual Time",
       round(model.ols.predict(UTL_NLA_ARRAY_DBL(hills.dist, hills.climb)), 3) "Model Predicted Time"
from hills,
     (select OLS_Regression(avg(time), var_pop(time), 
                        UTL_NLA_ARRAY_DBL(avg(dist), avg(climb)),
                        UTL_NLA_ARRAY_DBL(var_pop(dist), covar_pop(dist,climb),
                                                        var_pop(climb)),
                        UTL_NLA_ARRAY_DBL(covar_pop(time,dist), covar_pop(time,climb))) ols
      from hills sample (15) seed(7)) model
/


Rem
Rem The following is from a collection of tests for Linear Regression 
Rem that are available from NIST's Statistical Reference Datasets
Rem website (http://www.itl.nist.gov/div898/strd/)
Rem

Rem NIST/ITL StRD
Rem Dataset Name:  Wampler1 (Wampler1.dat)
Rem 
Rem File Format:   ASCII
Rem                Certified Values  (lines 31 to 50)
Rem                Data              (lines 61 to 81)
Rem 
Rem Procedure:     Linear Least Squares Regression
Rem 
Rem Reference:     Wampler, R. H. (1970). 
Rem                A Report of the Accuracy of Some Widely-Used Least 
Rem                Squares Computer Programs. 
Rem                Journal of the American Statistical Association, 65, pp. 549-565.
Rem            
Rem Data:          1 Response Variable (y)
Rem                1 Predictor Variable (x)
Rem                21 Observations
Rem                Higher Level of Difficulty
Rem                Generated Data
Rem 
Rem Model:         Polynomial Class
Rem                6 Parameters (B0,B1,...,B5)
Rem 
Rem                y = B0 + B1*x + B2*(x**2) + B3*(x**3)+ B4*(x**4) + B5*(x**5)
Rem 
Rem                Certified Regression Statistics
Rem 
Rem                                           Standard Deviation
Rem      Parameter        Estimate               of Estimate
Rem 
Rem         B0        1.00000000000000        0.000000000000000
Rem         B1        1.00000000000000        0.000000000000000
Rem         B2        1.00000000000000        0.000000000000000
Rem         B3        1.00000000000000        0.000000000000000
Rem         B4        1.00000000000000        0.000000000000000
Rem         B5        1.00000000000000        0.000000000000000
Rem 
Rem      Residual
Rem      Standard Deviation   0.000000000000000
Rem 
Rem      R-Squared            1.00000000000000
Rem 
Rem 
Rem                Certified Analysis of Variance Table
Rem 
Rem Source of Degrees of     Sums of               Mean  
Rem Variation  Freedom       Squares              Squares           F Statistic
Rem               
Rem Regression    5      18814317208116.7     3762863441623.33       Infinity
Rem Residual     15      0.000000000000000    0.000000000000000
Rem               
Rem Data:            y     x

drop table wampler1;

create table wampler1(y number,
                      x number);

insert into wampler1 values(                 1,     0);
insert into wampler1 values(                 6,     1);
insert into wampler1 values(                63,     2);
insert into wampler1 values(               364,     3);
insert into wampler1 values(              1365,     4);
insert into wampler1 values(              3906,     5);
insert into wampler1 values(              9331,     6);
insert into wampler1 values(             19608,     7);
insert into wampler1 values(             37449,     8);
insert into wampler1 values(             66430,     9);
insert into wampler1 values(            111111,    10);
insert into wampler1 values(            177156,    11);
insert into wampler1 values(            271453,    12);
insert into wampler1 values(            402234,    13);
insert into wampler1 values(            579195,    14);
insert into wampler1 values(            813616,    15);
insert into wampler1 values(           1118481,    16);
insert into wampler1 values(           1508598,    17);
insert into wampler1 values(           2000719,    18);
insert into wampler1 values(           2613660,    19);
insert into wampler1 values(           3368421,    20);

select 100*abs(1-T.ols.getCoefficient(0)) "%Error B0",
       100*abs(1-T.ols.getCoefficient(1)) "%Error B1",
       100*abs(1-T.ols.getCoefficient(2)) "%Error B2",
       100*abs(1-T.ols.getCoefficient(3)) "%Error B3",
       100*abs(1-T.ols.getCoefficient(4)) "%Error B4",
       100*abs(1-T.ols.getCoefficient(5)) "%Error B5"
from   (select OLS_Regression(avg(y), var_pop(y),
                         UTL_NLA_ARRAY_DBL(avg(x1),avg(x2),avg(x3),avg(x4),avg(x5)),
                         UTL_NLA_ARRAY_DBL(var_pop(x1), covar_pop(x1, x2),covar_pop(x1, x3),covar_pop(x1, x4),covar_pop(x1, x5),
                                                      var_pop(x2),covar_pop(x2, x3),covar_pop(x2, x4),covar_pop(x2, x5),
                                                                        var_pop(x3),covar_pop(x3, x4),covar_pop(x3, x5),
                                                                                          var_pop(x4),covar_pop(x4, x5),
                                                                                                            var_pop(x5)),
                         UTL_NLA_ARRAY_DBL(covar_pop(y,x1),covar_pop(y,x2),covar_pop(y,x3),covar_pop(y,x4),covar_pop(y,x5))) ols
       from (select y, x x1, power(x,2) x2, power(x,3) x3, power(x,4) x4,
                    power(x,5) x5 
             from wampler1)) T;
